package org.jeecg.modules.webcrawler.job;

import java.io.IOException;
import java.net.URLEncoder;
import java.util.Date;
import java.util.List;
import java.util.Map;

import org.apache.commons.lang.StringUtils;
import org.jeecg.common.util.DateUtils;
import org.jeecg.common.util.RedisUtil;
import org.jeecg.modules.webcrawler.entity.WebCrawlerWord;
import org.jeecg.modules.webcrawler.util.WebCrawlerCacheUtils;
import org.jeecg.modules.webcrawler.util.bloomfilter.BloomFilterHelper;
import org.jeecg.modules.webcrawler.util.bloomfilter.BloomRedisService;
import org.jsoup.Connection;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.quartz.Job;
import org.quartz.JobExecutionContext;
import org.quartz.JobExecutionException;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.data.redis.core.RedisTemplate;

import com.google.common.collect.Lists;
import com.google.common.collect.Maps;

import lombok.extern.slf4j.Slf4j;

/**
 * 邯郸广电网
 * 
 * @author Scott
 */
@Slf4j
public class TopRankingJob implements Job {
	
	@Autowired
    private RedisUtil redisUtil;
	
	@Override
	public void execute(JobExecutionContext jobExecutionContext) throws JobExecutionException {
		log.info(String.format("热搜排行榜" + DateUtils.getTimestamp()));
		get_top_baidu(111);
		get_top_weixin(112);
		get_top_weibo(113);
		
	}
	
	
	
	public  List<Map> get_top_baidu(int type){
		//整个html内容
		Document doc;
		List<Map> list = Lists.newArrayList();
		try {
			//Thread.sleep(10000);	//
			Connection conn = Jsoup.connect("http://top.baidu.com/buzz?b=1&fr=topindex").timeout(5000);
			conn.header("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8");
			conn.header("Accept-Encoding", "gzip, deflate, sdch");
			conn.header("Accept-Language", "zh-CN,zh;q=0.8");
			conn.header("User-Agent", "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36");
			doc = conn.get();
			log.info("***************************************初始化  搜索热词-百度********************************************");
			Elements tablelist = doc.select(".list-table tr");
			tablelist.remove(0);
			tablelist.remove(1);
			tablelist.remove(2);
			tablelist.remove(3);
			if(!tablelist.isEmpty()) {
				for (Element info : tablelist) {
					Map<String, String> map = Maps.newConcurrentMap();
					map.put("title", info.select("a.list-title").text());
					map.put("url", info.select("a.list-title").attr("href"));
					System.out.println(info.select("td.last").text());
					map.put("count", info.select("td.last").text());
					list.add(map);
				}
			}
			if(list != null && list.size() > 0){
				String key = "top_ranking_baidu";
				redisUtil.set(key, list);
			}
			return list;
		} catch (Exception e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
			return list;
		} 
	}
	
	public List<Map> get_top_weibo(int type){
		//整个html内容
		Document doc;
		List<Map> list = Lists.newArrayList();
		try {
			//Thread.sleep(10000);	//
			Connection conn = Jsoup.connect("https://s.weibo.com/top/summary").timeout(5000);
			conn.header("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8");
			conn.header("Accept-Encoding", "gzip, deflate, sdch");
			conn.header("Accept-Language", "zh-CN,zh;q=0.8");
			conn.header("User-Agent", "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36");
			doc = conn.get();
			log.info("*************************************微博热搜榜**********************************************");
			Elements tablelist = doc.getElementById("pl_top_realtimehot").select("table tr");
			tablelist.remove(0);
			tablelist.remove(0);
			if(!tablelist.isEmpty()) {
				for (Element info : tablelist) {
					Map<String, String> map = Maps.newConcurrentMap();
					map.put("title", info.select("td a").text());
					map.put("url", "https://s.weibo.com" + info.select("td a").attr("href"));
					map.put("count", info.select("td span").text());
					list.add(map);
				}
			}
			
	    	
			if(list != null && list.size() > 0){
				String key = "top_ranking_weibo";
				redisUtil.set(key, list);
			}
			
			return list;
		} catch (Exception e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
			return list;
		} 
	}
	public  List<Map> get_top_weixin(int type){
		//整个html内容
		Document doc;
		List<Map> list = Lists.newArrayList();
		try {
			//Thread.sleep(10000);	//
			Connection conn = Jsoup.connect("http://www.gsdata.cn/rank/wxarc").timeout(5000);
			conn.header("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8");
			conn.header("Accept-Encoding", "gzip, deflate, sdch");
			conn.header("Accept-Language", "zh-CN,zh;q=0.8");
			conn.header("User-Agent", "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36");
			doc = conn.get();
			log.info("***************************************最近24小时 微信文章排行榜  ********************************************");
			Elements tablelist = doc.getElementById("rank_data").select("tr");
			tablelist.remove(0);
			if(!tablelist.isEmpty()) {
				for (Element info : tablelist) {
					Map<String, String> map = Maps.newConcurrentMap();
					map.put("title", info.select("a").first().text());
					map.put("url", info.select("a").first().attr("href"));
					map.put("count", info.select("td").get(3).text());
					list.add(map);
				}
			}
			if(list != null && list.size() > 0){
				String key = "top_ranking_weixin";
				redisUtil.set(key, list);
			}
			return list;
		} catch (Exception e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
			return list;
		} 
	}
	
	
	
	
	public static void main(String[] args) {
		//整个html内容
				Document doc;
				List<Map> list = Lists.newArrayList();
				try {
					//Thread.sleep(10000);	//
					Connection conn = Jsoup.connect("http://top.baidu.com/buzz?b=1&fr=topindex").timeout(5000);
					conn.header("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8");
					conn.header("Accept-Encoding", "gzip, deflate, sdch");
					conn.header("Accept-Language", "zh-CN,zh;q=0.8");
					conn.header("User-Agent", "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36");
					doc = conn.get();
					log.info("***************************************初始化  搜索热词-微信********************************************");
					Elements tablelist = doc.select(".list-table tr");
					tablelist.remove(0);
					tablelist.remove(1);
					tablelist.remove(2);
					tablelist.remove(3);
					
					if(!tablelist.isEmpty()) {
						int i = 0;
						for (Element info : tablelist) {
							Map<String, String> map = Maps.newConcurrentMap();
							System.out.println("");
							System.out.println("----------------------第"+(i++)+"次-----------------------");
							System.out.println(info.select("a.list-title").text());
							System.out.println(info.select("a.list-title").attr("href"));
							System.out.println(info.select("td.last").text());
							
							System.out.println("");
						}
					}
				
				} catch (Exception e) {
					// TODO Auto-generated catch block
					e.printStackTrace();
				} 
	}
}
